/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    MiningSchema.java
 *    Copyright (C) 2008-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.core.pmml;

import java.io.Serializable;
import java.util.ArrayList;

import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import weka.core.Attribute;
import weka.core.Instances;

/**
 * This class encapsulates the mining schema from a PMML xml file. Specifically,
 * it contains the fields used in the PMML model as an Instances object (just
 * the header). It also contains meta information such as value ranges and how
 * to handle missing values, outliers etc.
 *
 * We also store various other PMML elements here, such as the
 * TransformationDictionary, DerivedFields and Targets (if defined). They are
 * not part of the mining schema per se, but relate to inputs used by the model
 * and it is convenient to store them here.
 *
 * @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
 * @version $Revision$
 */
public class MiningSchema implements Serializable {

    /** For serialization */
    private static final long serialVersionUID = 7144380586726330455L;

    /**
     * The structure of all the fields (both mining schema and derived) as Instances
     */
    protected Instances m_fieldInstancesStructure;

    /** Just the mining schema fields as Instances */
    protected Instances m_miningSchemaInstancesStructure;

    /** Meta information about the mining schema fields */
    protected ArrayList<MiningFieldMetaInfo> m_miningMeta = new ArrayList<MiningFieldMetaInfo>();

    /**
     * Meta information about derived fields (those defined in the
     * TransformationDictionary followed by those defined in LocalTransformations)
     */
    protected ArrayList<DerivedFieldMetaInfo> m_derivedMeta = new ArrayList<DerivedFieldMetaInfo>();

    /** The transformation dictionary (if defined) */
    protected TransformationDictionary m_transformationDictionary = null;

    /** target meta info (may be null if not defined) */
    protected TargetMetaInfo m_targetMetaInfo = null;

    private void getLocalTransformations(Element model) throws Exception {
        NodeList temp = model.getElementsByTagName("LocalTransformations");

        if (temp.getLength() > 0) {
            // should be just one LocalTransformations element
            Element localT = (Element) temp.item(0);

            // Set up some field defs to pass in
            /*
             * ArrayList<Attribute> fieldDefs = new ArrayList<Attribute>(); for (int i = 0;
             * i < m_miningSchemaInstancesStructure.numAttributes(); i++) {
             * fieldDefs.add(m_miningSchemaInstancesStructure.attribute(i)); }
             */

            NodeList localDerivedL = localT.getElementsByTagName("DerivedField");
            for (int i = 0; i < localDerivedL.getLength(); i++) {
                Node localDerived = localDerivedL.item(i);
                if (localDerived.getNodeType() == Node.ELEMENT_NODE) {
                    DerivedFieldMetaInfo d = new DerivedFieldMetaInfo((Element) localDerived, null /* fieldDefs */, m_transformationDictionary);
                    m_derivedMeta.add(d);
                }
            }
        }
    }

    /**
     * Constructor for MiningSchema.
     *
     * @param model          the <code>Element</code> encapsulating the pmml model
     * @param dataDictionary the data dictionary as an Instances object
     * @throws Exception if something goes wrong during construction of the mining
     *                   schema
     */
    public MiningSchema(Element model, Instances dataDictionary, TransformationDictionary transDict) throws Exception {

        /*
         * // First check for transformation dictionary/local transformations and
         * derived fields. // These are not supported yet. NodeList temp =
         * model.getElementsByTagName("LocalTransformations"); if (temp.getLength() > 0)
         * { throw new Exception("[MiningSchema] LocalTransformations " +
         * "are not supported yet."); }
         */

        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        NodeList fieldList = model.getElementsByTagName("MiningField");
        int classIndex = -1;
        int addedCount = 0;
        for (int i = 0; i < fieldList.getLength(); i++) {
            Node miningField = fieldList.item(i);
            if (miningField.getNodeType() == Node.ELEMENT_NODE) {
                Element miningFieldEl = (Element) miningField;

                MiningFieldMetaInfo mfi = new MiningFieldMetaInfo(miningFieldEl);

                if (mfi.getUsageType() == MiningFieldMetaInfo.Usage.ACTIVE || mfi.getUsageType() == MiningFieldMetaInfo.Usage.PREDICTED) {

                    // find this attribute in the dataDictionary
                    Attribute miningAtt = dataDictionary.attribute(mfi.getName());
                    if (miningAtt != null) {
                        mfi.setIndex(addedCount);
                        attInfo.add(miningAtt);
                        addedCount++;

                        if (mfi.getUsageType() == MiningFieldMetaInfo.Usage.PREDICTED) {
                            classIndex = addedCount - 1;
                        }

                        // add to the array list
                        m_miningMeta.add(mfi);
                    } else {
                        throw new Exception("Can't find mining field: " + mfi.getName() + " in the data dictionary.");
                    }
                }
            }
        }

        m_miningSchemaInstancesStructure = new Instances("miningSchema", attInfo, 0);

        // set these instances on the MiningFieldMetaInfos so that the
        // toString() method can operate correctly
        for (MiningFieldMetaInfo m : m_miningMeta) {
            m.setMiningSchemaInstances(m_miningSchemaInstancesStructure);
        }

        m_transformationDictionary = transDict;

        // Handle transformation dictionary and any local transformations
        if (m_transformationDictionary != null) {
            ArrayList<DerivedFieldMetaInfo> transDerived = transDict.getDerivedFields();
            m_derivedMeta.addAll(transDerived);
        }

        // Get any local transformations
        getLocalTransformations(model);

        // Set up the full instances structure: combo of mining schema fields and
        // all derived fields
        ArrayList<Attribute> newStructure = new ArrayList<Attribute>();
        for (MiningFieldMetaInfo m : m_miningMeta) {
            newStructure.add(m.getFieldAsAttribute());
        }

        for (DerivedFieldMetaInfo d : m_derivedMeta) {
            newStructure.add(d.getFieldAsAttribute());
        }
        m_fieldInstancesStructure = new Instances("FieldStructure", newStructure, 0);

        if (m_transformationDictionary != null) {
            // first update the field defs for any derived fields in the transformation
            // dictionary
            // and our complete list of derived fields, now that we have a fixed
            // ordering for the mining schema + derived attributes (i.e. could
            // be different from the order of attributes in the data dictionary that was
            // used when the transformation dictionary was initially constructed
            m_transformationDictionary.setFieldDefsForDerivedFields(m_fieldInstancesStructure);
        }

        // update the field defs for all our derived fields.
        for (DerivedFieldMetaInfo d : m_derivedMeta) {
            d.setFieldDefs(m_fieldInstancesStructure);
        }

        if (classIndex != -1) {
            m_fieldInstancesStructure.setClassIndex(classIndex);
            m_miningSchemaInstancesStructure.setClassIndex(classIndex);
        }

        // do Targets (if any)
        NodeList targetsList = model.getElementsByTagName("Targets");
        if (targetsList.getLength() > 0) {
            if (targetsList.getLength() > 1) {
                throw new Exception("[MiningSchema] Can only handle a single Target");
            } else {
                Node te = targetsList.item(0);
                if (te.getNodeType() == Node.ELEMENT_NODE) {
                    m_targetMetaInfo = new TargetMetaInfo((Element) te);

                    // fill in any necessary categorical values in the mining schema
                    // class attribute
                    if (m_fieldInstancesStructure.classIndex() >= 0 && m_fieldInstancesStructure.classAttribute().isString()) {
                        ArrayList<String> targetVals = m_targetMetaInfo.getValues();
                        if (targetVals.size() > 0) {
                            Attribute classAtt = m_fieldInstancesStructure.classAttribute();
                            for (int i = 0; i < targetVals.size(); i++) {
                                classAtt.addStringValue(targetVals.get(i));
                            }
                        }
                    }
                }
            }
        }
    }

    /**
     * Apply the missing value treatments (if any) to an incoming instance.
     *
     * @param values an array of doubles in order of the fields in the mining schema
     *               that represents the incoming instance (note: use
     *               PMMLUtils.instanceToSchema() to generate this).
     * @throws Exception if something goes wrong during missing value handling
     */
    public void applyMissingValuesTreatment(double[] values) throws Exception {
        for (int i = 0; i < m_miningMeta.size(); i++) {
            MiningFieldMetaInfo mfi = m_miningMeta.get(i);
            values[i] = mfi.applyMissingValueTreatment(values[i]);
        }
    }

    /**
     * Apply the outlier treatment methods (if any) to an incoming instance.
     *
     * @param values an array of doubles in order of the fields in the mining schema
     *               that represents the incoming instance (note: use
     *               PMMLUtils.instanceToSchema() to generate this).
     * @throws Exception if something goes wrong during outlier treatment handling
     */
    public void applyOutlierTreatment(double[] values) throws Exception {
        for (int i = 0; i < m_miningMeta.size(); i++) {
            MiningFieldMetaInfo mfi = m_miningMeta.get(i);
            values[i] = mfi.applyOutlierTreatment(values[i]);
        }
    }

    /**
     * Apply both missing and outlier treatments to an incoming instance.
     * 
     * @param values an array of doubles in order of the fields in the mining schema
     *               that represents the incoming instance (note: use
     *               MappingInfo.instanceToSchema() to generate this).
     * @throws Exception if something goes wrong during this process
     */
    public void applyMissingAndOutlierTreatments(double[] values) throws Exception {
        for (int i = 0; i < m_miningMeta.size(); i++) {
            MiningFieldMetaInfo mfi = m_miningMeta.get(i);
            values[i] = mfi.applyMissingValueTreatment(values[i]);
            values[i] = mfi.applyOutlierTreatment(values[i]);
        }
    }

    /**
     * Get the all the fields (both mining schema and derived) as Instances.
     * Attributes are in order of those in the mining schema, followed by derived
     * attributes from the TransformationDictionary followed by derived attributes
     * from LocalTransformations.
     *
     * @return all the fields as an Instances object
     */
    public Instances getFieldsAsInstances() {
        return m_fieldInstancesStructure;
    }

    /**
     * Get the mining schema fields as an Instances object.
     * 
     * @return the mining schema fields as an Instances object.
     */
    public Instances getMiningSchemaAsInstances() {
        return m_miningSchemaInstancesStructure;
    }

    /**
     * Get the transformation dictionary .
     * 
     * @return the transformation dictionary or null if none is defined.
     */
    public TransformationDictionary getTransformationDictionary() {
        return m_transformationDictionary;
    }

    /**
     * Returns true if there is Target meta data.
     *
     * @return true if there is Target meta data
     */
    public boolean hasTargetMetaData() {
        return (m_targetMetaInfo != null);
    }

    /**
     * Get the Target meta data.
     *
     * @return the Target meta data
     */
    public TargetMetaInfo getTargetMetaData() {
        return m_targetMetaInfo;
    }

    /**
     * Method to convert any string attributes in the mining schema Instances to
     * nominal attributes. This may be necessary if there are no Value elements
     * defined for categorical fields in the data dictionary. In this case, elements
     * in the actual model definition will probably reveal the valid values for
     * categorical fields.
     */
    public void convertStringAttsToNominal() {
        Instances miningSchemaI = getFieldsAsInstances();
        if (miningSchemaI.checkForStringAttributes()) {
            ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
            for (int i = 0; i < miningSchemaI.numAttributes(); i++) {
                Attribute tempA = miningSchemaI.attribute(i);
                if (tempA.isString()) {
                    ArrayList<String> valueVector = new ArrayList<String>();
                    for (int j = 0; j < tempA.numValues(); j++) {
                        valueVector.add(tempA.value(j));
                    }
                    Attribute newAtt = new Attribute(tempA.name(), valueVector);
                    attInfo.add(newAtt);
                } else {
                    attInfo.add(tempA);
                }
            }
            Instances newI = new Instances("miningSchema", attInfo, 0);
            if (m_fieldInstancesStructure.classIndex() >= 0) {
                newI.setClassIndex(m_fieldInstancesStructure.classIndex());
            }
            m_fieldInstancesStructure = newI;

            /*
             * StringToNominal stn = new StringToNominal();
             * stn.setInputFormat(miningSchemaI); Instances newI =
             * Filter.useFilter(miningSchemaI, stn); m_miningSchema = newI;
             */
        }
    }

    /**
     * Convert a numeric attribute in the mining schema to nominal.
     * 
     * @param index   the index of the attribute to convert
     * @param newVals an ArrayList of the values of the nominal attribute
     */
    public void convertNumericAttToNominal(int index, ArrayList<String> newVals) {
        Instances miningSchemaI = getFieldsAsInstances();
        if (miningSchemaI.attribute(index).isNominal()) {
            throw new IllegalArgumentException("[MiningSchema] convertNumericAttToNominal: attribute is " + "already nominal!");
        }

        ArrayList<String> newValues = new ArrayList<String>();
        for (int i = 0; i < newVals.size(); i++) {
            newValues.add(newVals.get(i));
        }

        ArrayList<Attribute> attInfo = new ArrayList<Attribute>();
        for (int i = 0; i < miningSchemaI.numAttributes(); i++) {
            Attribute tempA = miningSchemaI.attribute(i);
            if (i == index) {
                Attribute newAtt = new Attribute(tempA.name(), newValues);
                attInfo.add(newAtt);
            } else {
                attInfo.add(tempA);
            }
        }

        Instances newI = new Instances("miningSchema", attInfo, 0);
        if (m_fieldInstancesStructure.classIndex() >= 0) {
            newI.setClassIndex(m_fieldInstancesStructure.classIndex());
        }
        m_fieldInstancesStructure = newI;
    }

    public ArrayList<DerivedFieldMetaInfo> getDerivedFields() {
        return m_derivedMeta;
    }

    public ArrayList<MiningFieldMetaInfo> getMiningFields() {
        return m_miningMeta;
    }

    /**
     * Get a textual description of the mining schema.
     *
     * @return a textual description of the mining schema
     */
    public String toString() {
        StringBuffer temp = new StringBuffer();

        if (m_transformationDictionary != null) {
            temp.append(m_transformationDictionary);
        }

        temp.append("Mining schema:\n\n");
        for (MiningFieldMetaInfo m : m_miningMeta) {
            temp.append(m + "\n");
        }

        if (m_derivedMeta.size() > 0) {
            temp.append("\nDerived fields:\n\n");
            for (DerivedFieldMetaInfo d : m_derivedMeta) {
                temp.append(d + "\n");
            }
        }
        temp.append("\n");
        return temp.toString();
    }
}
